## ----setseedch6, echo = FALSE--------------------------------------------
set.seed(1982)

knitr::opts_chunk$set(
                      echo = FALSE,
                      warning = FALSE,
                      message = FALSE,
                      error = FALSE
)


## ----loadlibsch6---------------------------------------------------------
library(dplyr)
library(tidyr)
library(reshape2)
library(zoo)
library(mclogit)
library(car)
library(memisc)
library(pander)
library(ggplot2)
library(scales)
library(lme4)
library(rstanarm)
library(stringr)
library(Amelia)
fillin <- function(x) {
    x[is.na(x)] <- mean(x, na.rm = TRUE)
    return(x)
}

source("common_funcs.R")
dodge.width <- 0.75



## ----loaddatach6---------------------------------------------------------
load("ch2_data.RData")


## ----prepdv--------------------------------------------------------------
## Define an opinion as a judgement in the same case
## which has the same start and end pages
## Later on, do a check as to whether authorship was shared
holder <- list()
for (i in 1:9) {
    holder[[i]] <- sheet1[,c("NEUTRAL", "CASEID", "TERM", "HANDDOWN",
                             paste0("JUDGE",i),
                             paste0("JUDGE",i,"DISS"),
                             paste0("JUDGE",i,"BEGINPG"),
                             paste0("JUDGE",i,"ENDPG"),
                             paste0("JUDGE",i,"COAUTH",1:8))]
    names(holder[[i]]) <- c("NEUTRAL", "CASEID", "TERM", "HANDDOWN","JUDGE","DISS","BEGINPG","ENDPG",
                            paste0("COAUTH",1:8))
}
holder <- do.call("rbind", holder)
holder <- subset(holder, !is.na(JUDGE))

judgment.df <- holder %>%
    filter(!is.na(BEGINPG)) %>% 
    group_by(NEUTRAL, CASEID, BEGINPG, ENDPG) %>%
    summarize(Judges = paste(unique(sort(JUDGE)), collapse=";"),
              DISS = unique(DISS),
              Length = unique(ENDPG) - unique(BEGINPG),
              CoAuthJudges = paste(sort(unique(c(COAUTH1, COAUTH2, COAUTH3, COAUTH4,
                                                 COAUTH5, COAUTH6, COAUTH7, COAUTH8))), sep = ";", collapse = ";"))

### In the case of coauthorship,
### `CoAuthJudges` should equal `Judges`
### with the exception of one known exception
tmp <- judgment.df[which(judgment.df$CoAuthJudges != "" & (judgment.df$CoAuthJudges != judgment.df$Judges)),]
known.exception <- c("[2012] UKSC 50",
                     "[2017] UKSC 5") ## Miller
tmp <- subset(tmp, !is.element(NEUTRAL, known.exception))
if (nrow(tmp)>0) {
    stop("There's a problem in coauthorship")
}

singlej <- judgment.df %>%
    group_by(NEUTRAL, CASEID) %>%
    summarize(nJudgments = n()) %>%
    ungroup() %>% 
    summarize(result = mean(nJudgments == 1))
singlej <- round(100 * singlej)

### Was the judgment the first?
### Was it the longest?
judgment.df <- judgment.df %>%
    group_by(NEUTRAL, CASEID) %>%
    mutate(First = (BEGINPG == min(BEGINPG)),
           Longest = (Length == max(Length)))

### Did the first judgment dissent?
if (FALSE) { 
    table(judgment.df$DISS[which(judgment.df$First == TRUE)])
    table(judgment.df$DISS[which(judgment.df$Longest == TRUE)])
    table((judgment.df$Longest == TRUE)[which(judgment.df$First == TRUE)])


}

long_by_first <- with(subset(judgment.df, DISS == "C"),
                      table(Longest, First))

first_also_long <- round(100 * (long_by_first[,"TRUE"] / sum(long_by_first[,"TRUE"])))[2]

if (FALSE) {
    tbd <- judgment.df$CASEID[which(judgment.df$Longest == TRUE & judgment.df$First == FALSE & judgment.df$DISS == "C")]
    tbd <- judgment.df[judgment.df$CASEID %in% tbd,]
    tbd$MostImportant <- NA
    write.csv(tbd, file = "data/tbd.csv", row.names = FALSE)
}

## Let's take the first one
judgment.bak <- judgment.df
judgment.df <- judgment.df %>%
    group_by(NEUTRAL, CASEID) %>%
    subset(First == TRUE) %>%
    ungroup() %>% 
    mutate(JUDGE = strsplit(as.character(Judges), ";")) %>%
    unnest(JUDGE) %>%
    dplyr::select(NEUTRAL, CASEID, JUDGE, DISS)

judgment.df$Lead <- TRUE
old <- nrow(holder)
dat <- merge(holder, judgment.df,
                all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)
dat$Lead[is.na(dat$Lead)] <- FALSE
dat$Lead <- as.numeric(dat$Lead)

## Now let's take the longest one
judgment.df <- judgment.bak %>%
    group_by(NEUTRAL, CASEID) %>%
    subset(Longest == TRUE) %>%
    ungroup() %>% 
    mutate(JUDGE = strsplit(as.character(Judges), ";")) %>%
    unnest(JUDGE) %>%
    dplyr::select(NEUTRAL, CASEID, JUDGE, DISS)

judgment.df$Lead.alt <- TRUE
old <- nrow(holder)
dat <- merge(dat, judgment.df,
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)
dat$Lead.alt[is.na(dat$Lead.alt)] <- FALSE
dat$Lead.alt <- as.numeric(dat$Lead.alt)



## ---- fig = TRUE, fig.cap = "Proportion of cases with a single substantive opinion by year and term. M = Michaelmas (October to December), H = Hilary (January to April); E = Easter term (April to May); T = Trinity (June to July).", fig.width = 6, fig.height = 3.5----
plotbase <- holder %>%
    group_by(TERM, NEUTRAL, HANDDOWN, CASEID) %>%
    mutate(nJudges = n()) %>% 
    filter(!is.na(BEGINPG)) %>% 
    filter(!is.na(ENDPG)) %>%
    mutate(Length = ENDPG - BEGINPG,
           isSubstantive = Length > 2) %>%
    group_by(TERM, NEUTRAL, HANDDOWN, CASEID) %>%
    summarize(nSubstantive = sum(isSubstantive),
              SingleOpinion = as.numeric(nSubstantive == 1),
              nJudges = unique(nJudges))

summarize_terms <- function(df) {
    df %>%
        group_by(TERM, NEUTRAL, HANDDOWN) %>%
        summarize(nSubstantive = mean(nSubstantive),
                  SingleOpinion = as.numeric(nSubstantive == 1),
                  Year = unique(format(HANDDOWN, "%Y"))) %>%
        group_by(TERM, Year) %>%
        summarize(PercentSingle = mean(SingleOpinion == 1),
                  MeanOpinions = mean(nSubstantive),
                  Date = mean(HANDDOWN)) %>%
        arrange(Date) 

}

plot.df <- summarize_terms(plotbase)
plot.df_std <- summarize_terms(subset(plotbase, nJudges == 5))
plot.df_lrg <- summarize_terms(subset(plotbase, nJudges > 5))

plot.df_std$Date <- NULL
plot.df_lrg$Date <- NULL

plot.df$nJudges <- "All"
plot.df_std$nJudges <- "Panels of five"
plot.df_lrg$nJudges <- "Panels of seven or nine"

plot.df <- merge(merge(plot.df, plot.df_std, all = TRUE),
                 plot.df_lrg, all = TRUE)

plot.df <- plot.df %>%
    group_by(TERM, Year) %>%
    mutate(Date = unique(na.omit(Date)))

plot.df$TERM <- factor(plot.df$TERM,
                       levels = c("Hilary", "Easter", "Trinity", "Michaelmas"),
                       ordered = TRUE)
plot.df$Year <- as.numeric(plot.df$Year)

p1 <- ggplot(subset(plot.df, nJudges == "All"),
             aes(x = Date, y = PercentSingle)) +
    geom_line(alpha = 0.3) +
    geom_point(color = "white", size = 8) + 
    geom_text(aes(label = substr(TERM, 0, 1))) +
    scale_y_continuous("Judgments with one substantive opinion",
                       labels = scales::percent) +
    scale_x_date("Year and term") +
    theme_uksc()
print(p1)



## ---- fig = TRUE, fig.cap = "Mean number of substantive opinions by year and term. M = Michaelmas (October to December), H = Hilary (January to April); T = Trinity (April to May); E = Easter term (June to July).", fig.width = 6, fig.height = 3.5----

p2 <- ggplot(subset(plot.df, nJudges == "All"),
             aes(x = Date, y = MeanOpinions)) +
    geom_line(alpha = 0.3) +
    geom_point(color = "white", size = 8) + 
    geom_text(aes(label = substr(TERM, 0, 1))) +
    scale_y_continuous("Mean number of opinions", limits = c(1, NA)) +
    scale_x_date("Year and term") +
    theme_uksc()

## p2 <- ggplot(subset(plot.df, nJudges == "All"),
##              aes(x = Year, y = MeanOpinions, fill = TERM)) +
##     geom_bar(stat = "identity", position = "dodge") +
##     scale_y_continuous("Mean number of opinions") +
##     scale_x_continuous("Year and term") +
##     theme_uksc()

print(p2)



## ----judgeplot, fig = TRUE, fig.cap = "Which judges wrote more opinions than would be expected? Grey circles represent lead opinions; red squares indicate any substantive opinion. ", fig.width = 6, fig.height = 10----
### Create the expected number
dat$Wrote <- as.numeric((dat$ENDPG - dat$BEGINPG) > 2)
dat$Wrote[is.na(dat$Wrote)] <- 0
plot.df <- dat %>%
    group_by(NEUTRAL, CASEID) %>%
    mutate(expectation = 1 / length(unique(JUDGE))) %>%
    group_by(JUDGE) %>%
    summarize(nCases = length(unique(NEUTRAL)),
              nLed = sum(Lead),
              nWrote = sum(Wrote),
              ExpectedLead = sum(expectation),
              nCasesExclDissent = length(unique(NEUTRAL[which(DISS == "C")])),
              nLedExclDissent = sum(Lead[which(DISS == "C")]),
              nWroteExclDissent = sum(Wrote[which(DISS == "C")]),
              ExpectedLeadExclDissent = sum(expectation[which(DISS == "C")]))

plot.df$RAELO <- plot.df$nLedExclDissent  / plot.df$ExpectedLeadExclDissent

plot.df$RAESO <- plot.df$nWroteExclDissent  / plot.df$ExpectedLeadExclDissent

plot.df <- subset(plot.df, JUDGE %in% 1:20)
plot.df$Judge <- num2judge(plot.df$JUDGE)
plot.df$Judge <- factor(plot.df$Judge,
                        ordered = TRUE)
plot.df$Judge <- reorder(plot.df$Judge, plot.df$RAELO)


plot.df <- plot.df %>%
    dplyr::select(Judge, nCases, RAELO, RAESO) %>%
    melt(id.vars = c("Judge", "nCases"))

p3 <- ggplot(plot.df, aes(x = Judge, y = value,
                          shape = variable, size = nCases,
                          fill = variable,
                          color = variable)) +
    scale_x_discrete("") +
    geom_hline(data = plot.df %>%
                   filter(variable == "RAELO") %>%
                   summarise(meanpct = mean(value, na.rm = TRUE)),
               aes(yintercept = meanpct),
               colour = 'darkgrey') +
    geom_hline(data = plot.df %>%
                   filter(variable == "RAESO") %>%
                   summarise(meanpct = mean(value, na.rm = TRUE)),
                   aes(yintercept = meanpct),
               colour = 'darkred') +
    geom_point(stroke = 1.5) +
    scale_size_continuous("Cases heard", guide = "none") + 
    scale_y_continuous("Ratio of actual to expected opinions") +
    scale_shape_manual("Ratio",
                       labels = c("RAELO" = "Lead opinion",
                                  "RAESO" = "Substantive opinion"),
                values = my.pchs) +
    scale_fill_manual(values = c("darkgrey", "darkred"), guide = "none") +
    scale_color_manual(values = c("darkgrey", "darkred"), guide = "none") + 
    coord_flip() +     
    theme_uksc() +
    theme(legend.position = "bottom")
print(p3)

plot.df <- dcast(plot.df, Judge + nCases ~variable)
the.cor <- with(plot.df, round(cor(RAELO,RAESO), 2))
the.mod <- lm(RAESO ~ RAELO, data = plot.df)
less.substantive <- num2judge(which(resid(the.mod) > 0.3))
more.substantive <- num2judge(which(resid(the.mod) < -0.3))



## ----prepiv--------------------------------------------------------------
### Get date of oral argument
### and opinion below

old <- nrow(dat)
dat <- merge(dat, sheet1[,c("NEUTRAL", "CASEID", "HEARINGDATE1",
                            "OpinionBelow")],
             by = c("NEUTRAL", "CASEID"), 
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)

stopifnot(old == new)

### (0) Experience of judges, identity of senior judge
roles <- read.csv("office_holders.csv")
roles$Start <- as.Date(roles$Start)
roles$End <- as.Date(roles$End)
old <- nrow(dat)
dat <- merge(dat, subset(roles, Role == "Member"),
             all.x = TRUE, all.y = FALSE,
             by.x = "JUDGE", by.y = "Judge")
new <- nrow(dat)
stopifnot(old==new)

dat$Start[is.na(dat$Start)] <- as.Date("9999-12-31")
dat$DaysExperience <- dat$HEARINGDATE1 - dat$Start
dat$DaysExperience[which(dat$DaysExperience < 0)] <- 0
dat$DaysExperience[which(dat$HEARINGDATE1 > dat$End)] <-
    dat$End[which(dat$HEARINGDATE1 > dat$End)] -
    dat$Start[which(dat$HEARINGDATE1 > dat$End)]

### Presidents, vice-presidents
dat$isVP <- FALSE
dat$isPres <- FALSE
for (i in which(roles$Role == "President")) {
    prespos <- which(dat$JUDGE == roles$Judge[i] & dat$HEARINGDATE1 > roles$Start[i] & dat$HEARINGDATE1 < roles$End[i])
    dat$isPres[prespos] <- TRUE
}
for (i in which(roles$Role == "Vice-President")) {
    prespos <- which(dat$JUDGE == roles$Judge[i] & dat$HEARINGDATE1 > roles$Start[i] & dat$HEARINGDATE1 < roles$End[i])
    dat$isVP[prespos] <- TRUE
}

### First pass: check for Presidents, vice-Presidents
dat <- dat %>%
    group_by(NEUTRAL, CASEID) %>%
    mutate(hasPres = any(isPres),
           hasVP = any(isVP))
### Second pass: check for most senior Judge, in numeric order
dat <- dat %>%
    group_by(NEUTRAL, CASEID) %>%
    mutate(isSeniorJudge = as.numeric(JUDGE == min(JUDGE[!isVP & !isPres])))

### Third pass: set to Pres/ Vice-Pres / most Senior

dat$isSeniorJudge[which(dat$isPres)] <- 1
dat$isSeniorJudge[which(!dat$hasPres & dat$isVP)] <- 1
dat$isSeniorJudge[which(dat$hasPres & !dat$isPres)] <- 0
dat$isSeniorJudge[which(!dat$hasPres & dat$hasVP & !dat$isVP)] <- 0
## summary(dat$isSeniorJudge)

### Some error checking
foo <- aggregate(dat$isSeniorJudge,
                 list(NE = dat$NEUTRAL, CA = dat$CASEID),
                 sum)
stopifnot(all(foo$x==1))
rm(foo)



### (1) Previous agreement with senior judge
dat <- dat %>%
    group_by(NEUTRAL, CASEID) %>%
    mutate(SeniorJudge = JUDGE[which(isSeniorJudge==1)])

### Go over neutrals, transform to diss
### If there are more than two entries, 
### it's a (partial) dissent

mult2diss <- function(x) { 
	dissented <- all(x==0)
	part_dissented <- length(table(x)) > 1
	return(as.numeric(!dissented & !part_dissented))
}


holder$DISS <- ifelse(holder$DISS == "C",
                      1,
                      0)


voteholder <- holder %>% group_by(NEUTRAL, JUDGE, HANDDOWN) %>%
	summarize(DISS = mult2diss(DISS))

votemat <- acast(voteholder, NEUTRAL + HANDDOWN ~ JUDGE, 
	value.var = "DISS")
presence.mat <- !is.na(votemat)
votemat[is.na(votemat)] <- 0

dates <- as.Date(gsub(".*_","",rownames(votemat)))

res <- lapply(unique(dates), function(d) {
    tmpvotemat <- votemat[dates < d,]
    tmppresencemat <- presence.mat[dates<d,]
    times_sat_together <- crossprod(tmppresencemat, tmppresencemat)
    times_agreed <- crossprod(tmpvotemat, tmpvotemat)
    times_sat_together <- melt(times_sat_together,
                               value.name = "sat_together")
    times_agreed <- melt(times_agreed,
                               value.name = "agreed")
    retval <- merge(times_sat_together, times_agreed, all = TRUE)
    retval$date <- d
    return(retval)
})

prior_agreement <- do.call("rbind", res)
### Expand prior agreement to include intermediate dates
prior_agreement <- prior_agreement %>%
    complete(Var1, Var2, date  = seq(min(date),max(date),1)) %>%
    group_by(Var1, Var2) %>%
    mutate(sat_together = na.locf(sat_together, na.rm = FALSE),
           agreed = na.locf(agreed, na.rm = FALSE))

old <- nrow(dat)
dat <- merge(dat, prior_agreement,
             by.x = c("JUDGE", "SeniorJudge", "HEARINGDATE1"),
             by.y = c("Var1", "Var2", "date"),
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

dat$prior_agreement <- (dat$agreed + 1)/(dat$sat_together + 1)
dat$prior_agreement[which(dat$JUDGE == dat$SeniorJudge)] <- 1
dat$prior_agreement[is.na(dat$prior_agreement)] <- 1

### (1b) Prior dissent
prior_dissent <- voteholder %>%
    group_by(JUDGE) %>%
    arrange(HANDDOWN) %>%
    mutate(cumdiss = cumsum(DISS == 0),
           prior_dissent = lag(cumdiss)/0:(n()-1)) %>%
    mutate(prior_dissent = coalesce(prior_dissent, 0)) %>% 
### Expand prior agreement to include intermediate dates
    complete(JUDGE, HANDDOWN = seq(min(HANDDOWN),max(HANDDOWN),1)) %>%
    group_by(JUDGE) %>%
    mutate(prior_dissent = na.locf(prior_dissent, na.rm = FALSE)) %>%
    dplyr::select(JUDGE, HANDDOWN, prior_dissent)

dat <- merge(dat, prior_dissent,
             by.x = c("JUDGE", "HEARINGDATE1"),
             by.y = c("JUDGE", "HANDDOWN"),
             all.x = TRUE,
             all.y = FALSE)

### (2) Agreement with panel
### Use the prior agreement data
### For each panel (each unique NEUTRAL / Date combo)
### Expand to generate all pairwise combinations of judges
### Labelled judge A and judge B
### Drop out the most senior judge from judge B
### Merge with prior_agreement
### For each neutral, and
### For each judge A, average dissent over the other judge Bs
old <- nrow(dat)
dat <- dat %>%
    group_by(HEARINGDATE1, NEUTRAL, SeniorJudge) %>%
    tidyr::expand(JudgeA = JUDGE, JudgeB = JUDGE) %>%
    filter(JudgeA != JudgeB) %>% 
    filter(JudgeB != SeniorJudge) %>%
    left_join(prior_agreement,
              by = c("JudgeA" = "Var1",
                     "JudgeB" = "Var2",
                     "HEARINGDATE1" = "date")) %>%
    mutate(sat_together = coalesce(sat_together, 0),
           agreed = coalesce(agreed, 0),
           prior_agreement = (1 + agreed)/(1 + sat_together)) %>%
    group_by(NEUTRAL, JudgeA) %>%
    summarize(panel_agreement = mean(prior_agreement)) %>%
    dplyr::select(NEUTRAL, JudgeA, panel_agreement) %>%
    right_join(dat, by = c("JudgeA" = "JUDGE",
                           "NEUTRAL" = "NEUTRAL"))

new <- nrow(dat)
stopifnot(old == new)

names(dat) <- sub("JudgeA", "JUDGE", names(dat))

### (3) Specialism in this area
old <- nrow(dat)
dat <- merge(dat, sheet1[,c("CASEID", "NEUTRAL", "Area")],
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

dat$Area <- car:::recode(dat$Area,
                         "'Chancery'='Tax.and.Chancery';'N Ireland'='NI'")

specm <- melt(spec[,-2],
             id.var = "Judge")
names(specm) <- c("Judge", "Area", "Specialisation")
old <- nrow(dat)
dat <- merge(dat, specm,
             by.x = c("JUDGE","Area"),
             by.y = c("Judge","Area"),
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

old <- nrow(dat)
dat <- merge(dat, spec,
             by.x = c("JUDGE"),
             by.y = c("Judge"),
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

### (4) workload
old <- nrow(dat)
dat <- merge(dat, individualWorkload,
             by.x = c("JUDGE", "HEARINGDATE1"),
             by.y = c("Judge", "Date"),
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

### (5) tenure
dat$freshman <- as.numeric(dat$DaysExperience < 365.25)
dat$Seniority <- dat$DaysExperience / 365.25

### (6) Case importance
old <- nrow(dat)
dat <- merge(dat, sheet1[,c("CASEID", "NEUTRAL", "Importance")],
             all.x = TRUE,
             all.y = FALSE)
new <- nrow(dat)
stopifnot(old == new)

### (7) Temporary members
dat$Temporary <- ((dat$JUDGE > 20)|(dat$HEARINGDATE1 > dat$End))
dat$workloadTotal[is.na(dat$workloadTotal)] <- mean(dat$workloadTotal, na.rm = TRUE)
dat$Seniority[is.na(dat$Seniority)] <- 0
dat <- dat %>%
    group_by(Area) %>%
    mutate(Specialisation = fillin(Specialisation))

### Weights
dat <- dat %>%
    group_by(NEUTRAL) %>%
    mutate(w8 = 1 / length(unique(CASEID)))


## ----judexp, eval = TRUE-------------------------------------------------
### Rather awkward stuff here
### See http://stackoverflow.com/questions/15698399/cumulative-count-of-unique-values-in-r
### for the background
dat <- dat %>%
    group_by(JUDGE) %>%
    arrange(HANDDOWN, NEUTRAL) %>%
    mutate(experience = cummax(as.numeric(factor(as.character(NEUTRAL), levels = unique(as.character(NEUTRAL))))))



## ----modelling-----------------------------------------------------------
dat <- dat %>%
    group_by(CASEID, NEUTRAL) %>%
    mutate(NoDissents = all(DISS == "C"))

dat$isSenior <- as.numeric(dat$JUDGE == dat$SeniorJudge)
dat$isTemporary <- ((dat$JUDGE > 20) |
                    (dat$HEARINGDATE1 < dat$Start) |
                    (dat$HEARINGDATE1 > dat$End))

dat$isTemporary <- as.numeric(dat$isTemporary)
dat$JUDGE <- factor(dat$JUDGE,
                    levels = sort(unique(dat$JUDGE)))

dat$Phillips <- as.numeric(dat$HEARINGDATE1 < as.Date("2012-10-01"))
### Non-dissenting judges only
dat <- subset(dat, DISS == "C")

### Create strata for rstanarm
dat$strata <- with(dat, interaction(CASEID, NEUTRAL))

### Round things up

dat$panel_agreement <- 100 * dat$panel_agreement
dat$prior_agreement <- 100 * dat$prior_agreement
dat$prior_dissent <- 100 * dat$prior_dissent

## Combine for the purposes of modelling
model.df <- dat %>%
    group_by(NEUTRAL, JUDGE) %>%
    summarize(Lead = max(Lead, na.rm = TRUE),
              Specialisation = weighted.mean(Specialisation,
                                             w8,
                                             na.rm = TRUE),
              Importance = weighted.mean(Importance,
                                         w8,
                                         na.rm = TRUE),
              workloadTotal = weighted.mean(workloadTotal,
                                            w8,
                                            na.rm = TRUE),
              isSeniorJudge = max(isSeniorJudge, na.rm = TRUE),
              isTemporary = max(isTemporary, na.rm = TRUE),
              Seniority = max(Seniority, na.rm = TRUE),
              prior_agreement = weighted.mean(prior_agreement,
                                              w8,
                                              na.rm = TRUE),
              prior_dissent = weighted.mean(prior_dissent,
                                            w8,
                                            na.rm = TRUE),
              panel_agreement = weighted.mean(panel_agreement,
                                              w8,
                                              na.rm = TRUE))

model.df$prior_dissent[is.na(model.df$prior_dissent)] <- 0
model.df$isTemporary[!is.finite(model.df$isTemporary)] <- 0

### Rescale vars
model.df.bak <- model.df
vars <- c("Importance", "Specialisation",
          "workloadTotal", "Seniority",
          "prior_agreement",
          "panel_agreement",
          "prior_dissent")

means_std_vars <- numeric()
sd_std_vars <- numeric()
for (v in vars) {
    means_std_vars <- c(means_std_vars,
                        mean(model.df[[v]], na.rm = TRUE))
    sd_std_vars <- c(sd_std_vars,
                     sd(model.df[[v]], na.rm = TRUE))
    model.df[[v]] <- arm::rescale(as.numeric(model.df[[v]]))
}



## ----doallmods, message = FALSE------------------------------------------
if (file.exists("cache/06_cached_results.RData")) {
    load("cache/06_cached_results.RData")
} else {

    my.iter <- 600
### LEGAL MODELS
### FREQUENTIST
    
    clmod.legal <- mclogit(cbind(Lead, factor(NEUTRAL)) ~
                               Specialisation * Importance,
                           random = ~1|JUDGE,
                           data = model.df)
    
### RSTANARM
    if (FALSE) {
        legalmod.rstanarm <- stan_glmer(Lead ~ Specialisation *
                                            Importance +
                                            (1|NEUTRAL) +
                                            (1|JUDGE) - 1,
                                        data = model.df,
                                        family = poisson,
                                        ## chains = 2,
                                        ## cores = 2,
                                        seed = 1982,
                                        iter = my.iter,
                                        algorithm = "meanfield",
                                        #control = list(adapt_delta = 2),
                                        prior = student_t(7, 0, 10))
    }
    
### ORGANISATIONAL MODELS
### FREQUENTIST
    
    clmod.org <- update(clmod.legal, . ~ . - Specialisation - Specialisation:Importance - Specialisation*Importance +
                                         workloadTotal * Importance +
                                         isSeniorJudge * Importance +
                                         isTemporary * Importance +
                                         Seniority * Importance)


### RSTANARM
    if (FALSE) { 
    orgmod.rstanarm <- stan_glmer(Lead ~ workloadTotal * Importance +
                                       isSeniorJudge * Importance +
                                       isTemporary * Importance +
                                       Seniority * Importance +
                                       (1|NEUTRAL) + (1|JUDGE) - 1,
                                  data = model.df,
                                  family = poisson,
                                  iter = my.iter,
                                  algorithm = "meanfield",
                                        #chains = 2, cores = 2,
                                  seed = 1982)
    }
    
### POLITICAL MODELS
### FREQUENTIST
        
    clmod.political <- update(clmod.legal, . ~ . - Specialisation - Specialisation:Importance +
                                               isSeniorJudge * Importance +
                                               prior_agreement * Importance +
                                               panel_agreement * Importance +
                                               prior_dissent * Importance)

### RSTANARM
    if (FALSE) { 
        polmod.rstanarm <- stan_glmer(Lead ~ isSeniorJudge * Importance +
                                          prior_agreement * Importance +
                                          panel_agreement * Importance +
                                          prior_dissent * Importance + 
                                          (1|JUDGE) +
                                          (1|NEUTRAL) - 1,
                                      family = poisson,
                                      prior = student_t(7, 0, 10),
                                      algorithm = "meanfield",
                                      data = model.df,
                                      iter = my.iter,
                                      ## chains = 2,
                                      ## cores = 2,
                                      seed = 1982)
    }
    

### COMBINED MODELS
### FREQUENTIST
    
    clmod.comb <- update(clmod.legal, . ~ . +
                                          workloadTotal +
                                          workloadTotal:Importance +
                                          isSeniorJudge +
                                          isSeniorJudge:Importance +
                                          isTemporary +
                                          isTemporary:Importance +
                                          Seniority +
                                          Seniority:Importance +
                                          prior_agreement +
                                          prior_agreement:Importance +
                                          panel_agreement +
                                          panel_agreement:Importance +
                                          prior_dissent +
                                          prior_dissent:Importance)

### RSTANARM
    if (FALSE) { 
        combmod.rstanarm <- stan_glmer(Lead ~ Specialisation +
                                           Specialisation:Importance +
                                           workloadTotal +
                                       workloadTotal:Importance +
                                       isSeniorJudge + 
                                       isSeniorJudge:Importance +
                                       isTemporary +
                                       isTemporary:Importance +
                                       Seniority +
                                       Seniority:Importance +
                                       prior_agreement +
                                       prior_agreement:Importance +
                                       panel_agreement +
                                       panel_agreement:Importance +
                                       prior_dissent +
                                       prior_dissent:Importance + 
                                       (1|JUDGE) +
                                       factor(NEUTRAL) - 1,
                                   data = model.df,
                                   ## chains = 2,
                                   ## cores = 2,
                                   iter = my.iter,
                                   algorithm = "meanfield",
                                   seed = 1982)

        }
    save(list = c("clmod.legal",
                  "clmod.org",
                  "clmod.political",
                  "clmod.comb"),
                  ## "legalmod.rstanarm",
                  ## "orgmod.rstanarm",
                  ## "polmod.rstanarm",
                  ## "combmod.rstanarm")
         file = "cache/06_cached_results.RData")
}



## ----legalplotch6, fig = TRUE, fig.cap = "Legal model of lead opinion writing. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 2.5----
## Figure height should be what, (nCoefs + 2) / 2.5?

legalmod.labs <- list("(Intercept)" = "Intercept",
                      "Specialisation" = "Specialisation",
                      "Specialisation:Importance" = "Specialisation × importance",
                      "Importance" = "Importance")


                                        #mycoefplot.mult(list(legalmod.rstanarm, legalmod.rstanarm_alt),
mycoefplot(clmod.legal,
           ylab = "Coefficient value",
           drop = "Importance",
           ylim = c(-1, 4),
           variable.labels = legalmod.labs, 
           sec.ylab = "Odds are ... times greater")

## spec.coef <- round(exp(fixef(legalmod.rstanarm)[1]))
## spec.int <- exp(posterior_interval(legalmod.rstanarm)[1,])
## spec.int <- c(floor(spec.int[1]), ceiling(spec.int[2]))

spec.coef <- coef(clmod.legal)["Specialisation"]
spec.se <- summary(clmod.legal)$coef["Specialisation", "Std. Error"]
spec.lo <- spec.coef - 1.96 * spec.se
spec.hi <- spec.coef + 1.96 * spec.se

spec.coef <- round(exp(spec.coef), 2) 
spec.int <- c(round(exp(spec.lo), 2),
              round(exp(spec.hi), 2))



## ----orgplotch6, fig = TRUE, fig.cap = "Organisational model of lead opinion writing. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 5----
## Figure height should be what, (nCoefs + 2) / 2.5?
orgmod.labs <- list("(Intercept)" = "Intercept",
                    "isSeniorJudge" = "Presiding judge",
                    "isSeniorJudge:Importance" = "Presiding judge × importance",
                    "Importance:isSeniorJudge" = "Presiding judge × importance",
                    "Seniority" = "Seniority (years on court)",
                    "Seniority:Importance" = "Seniority × importance",
                    "Importance:Seniority" = "Seniority × importance",
                    "workloadTotal" = "Workload",
                    "workloadTotal:Importance" = "Workload × importance",
                    "Importance:workloadTotal" = "Workload × importance",
                    "isTemporary" = "Temporary judge",
                    "isTemporary:Importance" = "Temporary judge × importance",
                    "Importance:isTemporary" = "Temporary judge × importance")

                                        #mycoefplot.mult(list(orgmod.rstanarm, orgmod.rstanarm_alt),
mycoefplot(clmod.org,
                ylab = "Coefficient value",
                drop = "Importance",
                variable.labels = orgmod.labs, 
                sec.ylab = "Odds are ... times greater")

wl.coef <- coef(clmod.org)["workloadTotal"]
wl.se <- summary(clmod.org)$coef["workloadTotal", "Std. Error"]
wl.lo <- wl.coef + 1.96 * wl.se
wl.hi <- wl.coef + 1.96 * wl.se

wl.coef <- round(exp(wl.coef), 2) 
wl.int <- c(round(exp(wl.lo), 2),
              round(exp(wl.hi), 2))

pres.coef <- coef(clmod.org)["isSeniorJudge"]
pres.se <- summary(clmod.org)$coef["isSeniorJudge", "Std. Error"]
pres.lo <- pres.coef - 1.96 * pres.se
pres.hi <- pres.coef + 1.96 * pres.se

pres.coef <- round(exp(pres.coef), 2) 
pres.int <- c(round(exp(pres.lo), 2),
              round(exp(pres.hi), 2))

senior.coef <- coef(clmod.org)["Seniority"]
senior.se <- summary(clmod.org)$coef["Seniority", "Std. Error"]
senior.lo <- senior.coef - 1.96 * senior.se
senior.hi <- senior.coef + 1.96 * senior.se

senior.coef <- round(exp(senior.coef), 2) 
senior.int <- c(round(exp(senior.lo), 2),
              round(exp(senior.hi), 2))



## ----polplotch6, fig = TRUE, fig.cap = "Political model of lead opinion writing.  Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 4----

polmod.labs <- list("prior_agreement" = "Agreement with presiding judge",
                    "isSeniorJudge" = "Presiding judge",
                    "panel_agreement" = "Agreement with rest of panel",
                    "prior_agreement:Importance" = "Agreement with presiding judge × importance",
                    "isSeniorJudge:Importance" = "Presiding judge × importance",
                    "panel_agreement:Importance" = "Agreement with rest of panel × importance",
                    "Importance:prior_agreement" = "Agreement with presiding judge × importance",
                    "Importance:isSeniorJudge" = "Presiding judge × importance",
                    "Importance:panel_agreement" = "Agreement with rest of panel × importance",
                    "Importance:prior_dissent" = "Prior dissent × importance",
                    "prior_dissent:Importance" = "Prior dissent × importance",
                    "prior_dissent" = "Prior dissent")

## Figure height should be what, (nCoefs + 2) / 2.5?
                                        #mycoefplot.mult(list(polmod.rstanarm, polmod.rstanarm_alt),
mycoefplot(clmod.political,
                ylab = "Coefficient value",
                drop = "Importance",
                variable.labels = polmod.labs, 
                sec.ylab = "Odds are ... times greater")

agpres.coef <- coef(clmod.political)["prior_agreement"]
agpres.se <- summary(clmod.political)$coef["prior_agreement", "Std. Error"]
agpres.lo <- agpres.coef - 1.96 * agpres.se
agpres.hi <- agpres.coef + 1.96 * agpres.se

agpres.coef <- round(exp(agpres.coef), 2) 
agpres.int <- c(round(exp(agpres.lo), 2),
              round(exp(agpres.hi), 2))


agpanel.coef <- coef(clmod.political)["panel_agreement"]
agpanel.se <- summary(clmod.political)$coef["panel_agreement", "Std. Error"]
agpanel.lo <- agpanel.coef - 1.96 * agpanel.se
agpanel.hi <- agpanel.coef + 1.96 * agpanel.se

agpanel.coef <- round(exp(agpanel.coef), 2) 
agpanel.int <- c(round(exp(agpanel.lo), 2),
              round(exp(agpanel.hi), 2))



## ----combplotch6, fig = TRUE, fig.cap = "Combined model of lead opinion writing.  Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 7.5----
## Figure height should be what, (nCoefs + 2) / 2.5?

combmod.labs <- c(legalmod.labs, orgmod.labs, polmod.labs)
combmod.labs <- combmod.labs[!duplicated(combmod.labs)]
### Add on interaction terms with reverse order
addons <- combmod.labs
names(addons) <- gsub("(.*):(.*)", "\\2:\\1", names(combmod.labs))
combmod.labs <- c(combmod.labs, addons)


## mycoefplot.mult(list(combmod.rstanarm, combmod.rstanarm_alt),
mycoefplot(clmod.comb,
           ylab = "Coefficient value",
           drop = "Importance",
           variable.labels = combmod.labs,
           sec.ylab = "Odds are ... times greater")



## ----simsection, eval = TRUE, results = "asis"---------------------------
predictprob <- function(mod, newdata = NULL) {
    pr <- posterior_linpred(mod, newdata = newdata,
                            transform = TRUE)
    pr[,1]
}

predictprob <- function(mod, newdata = NULL) {
    mat <- model.matrix(formula(mod),
                        data = newdata)
    
    if (inherits(mod, "mclogitRandeff")) {
        require(mvtnorm)
        b <- rmvnorm(n = 2000,
                     mean = coef(mod),
                     sigma = vcov(mod)[1:length(coef(mod)), 1:length(coef(mod))])
        b <- cbind(1, b)
        colnames(b)[1] <- "(Intercept)"
    } else {
        b <- as.matrix(mod)
    }
    
    res <- tcrossprod(b, mat)
    res <- exp(res)
    res <- res / rowSums(res, na.rm = TRUE)
    res[,1]
}

cf <- function(mod, old, new, alpha = 0.05) {
    oldprobs <- predictprob(mod, newdata = old)
    newprobs <- predictprob(mod, newdata = new)
    oldprobs <- oldprobs
    newprobs <- newprobs
    diff <- newprobs - oldprobs
    return(data.frame(mean = median(diff),
                      llo = quantile(diff, 1/12),
                      hhi = quantile(diff, 11/12),
                      lo = quantile(diff, alpha/2),
                      hi = quantile(diff, 1 - alpha / 2)))
}

low_imp <- with(model.df,
             data.frame(Specialisation = rep(0, 5),
                        workloadTotal = rep(mean(workloadTotal), 5),
                        isSeniorJudge = rep(0, 5),
                        isTemporary = rep(0, 5), 
                        Seniority = rep(0, 5),
                        Importance = rep(0, 5),
                        JUDGE = rep("6", 5),
                        CASEID = rep("UKSC 2017/0031", 5),
                        NEUTRAL = rep("[2017] UKSC 54", 5),
                        Lead = rep(1/5, 5),
                        prior_agreement = rep(mean(prior_agreement), 5),
                        panel_agreement = rep(mean(panel_agreement), 5),
                        prior_dissent = rep(0, 5)))

high_imp <- low_imp
high_imp$Importance <- max(model.df$Importance)


### (a) Agreement w/ presiding
inc_agpres_low <- low_imp
inc_agpres_high <- high_imp
inc_agpres_low$prior_agreement[1] <- 
    inc_agpres_high$prior_agreement[1] <- -1

agpres.chg.lo <- cf(clmod.comb, low_imp, inc_agpres_low)
agpres.chg.hi <- cf(clmod.comb, high_imp, inc_agpres_high)

### (b) Agreement w/ panel
inc_agpanel_low <- low_imp
inc_agpanel_high <- high_imp

inc_agpanel_low$panel_agreement[1] <- 
    inc_agpanel_high$panel_agreement[1] <- -1

agpanel.chg.lo <- cf(clmod.comb, low_imp, inc_agpanel_low)
agpanel.chg.hi <- cf(clmod.comb, high_imp, inc_agpanel_high)

### (c) Specialisation
inc_spec_low <- low_imp
inc_spec_high <- high_imp
inc_spec_low$Specialisation[1] <- 1
inc_spec_high$Specialisation[1] <- 1

spec.chg.lo <- cf(clmod.comb, low_imp, inc_spec_low)
spec.chg.hi <- cf(clmod.comb, high_imp, inc_spec_high)

### (d) Temporary
inc_temp_low <- low_imp
inc_temp_high <- high_imp
inc_temp_low$isTemporary[1] <- 1
inc_temp_high$isTemporary[1] <- 1

temp.chg.lo <- cf(clmod.comb, low_imp, inc_temp_low)
temp.chg.hi <- cf(clmod.comb, high_imp, inc_temp_high)

### (e) freshman
inc_fresh_low <- low_imp
inc_fresh_high <- high_imp
inc_fresh_low$Seniority[1] <- 1
inc_fresh_high$Seniority[1] <- 1

fresh.chg.lo <- cf(clmod.comb, low_imp, inc_fresh_low)
fresh.chg.hi <- cf(clmod.comb, high_imp, inc_fresh_high)

### (f) workload
inc_work_low <- low_imp
inc_work_high <- high_imp
inc_work_low$workloadTotal[1] <-
    inc_work_high$workloadTotal[1] <- 1

work.chg.lo <- cf(clmod.comb, low_imp, inc_work_low)
work.chg.hi <- cf(clmod.comb, high_imp, inc_work_high)


### Now need to construct a matrix which has nConditions * 3 rows
### and two columns
### How to combine all these?
out <- rbind(agpres.chg.lo, agpres.chg.hi,
             agpanel.chg.lo, agpanel.chg.hi,
             spec.chg.lo, spec.chg.hi,
             temp.chg.lo, temp.chg.hi,
             fresh.chg.lo, fresh.chg.hi,
             work.chg.lo, work.chg.hi)

out <- as.data.frame(out)
changes <- c("Average (91%) to low (73%) agreement\nwith presiding judge",
             "Average (88%) to low (74%) agreement\nwith panel",
             "Average (19% to high (73%) specialism",
             "Temporary member",
             "Average (2.5 years) to high (6.5 years) seniority",
             "20 days more work")

changes <- str_wrap(changes, 20)

out$Change <- rep(changes, each = 2)

out$Importance <- rep(c("Low importance", "High importance"),
                      times = length(changes))
out$Importance <- factor(out$Importance,
                         levels = c("Low importance", "High importance"),
                         ordered = TRUE)

out$Change <- factor(out$Change,
                     ordered = TRUE)
out$Change <- reorder(out$Change,
                      out$mean)



## ----predchanges, fig = TRUE, fig.cap = "Changes in the predicted probability of writing the lead opinion, according to certain specific changes in one judge. ", fig.width = 6, fig.height = 6----

out$isSignificant <- apply(out[,c("lo", "hi")], 1, isSig)

out$inner.bar.col <- ifelse(out$isSignificant,
                            inner.bar.col,
                            "lightgrey")
out$outer.bar.col <- ifelse(out$isSignificant,
                            outer.bar.col,
                            "darkgrey")

ggplot(out, aes(x = Change,
                y = mean, ymin = lo, ymax = hi,
                group = Importance)) +
    scale_x_discrete("") +
    scale_y_continuous("Change in\nprobability of being selected\n(percentage points)") + 
    geom_linerange(aes(colour = inner.bar.col),
                   position = position_dodge(width = dodge.width),
                   size=inner.bar.size, 
                   alpha = 0.8) +
    geom_linerange(aes(ymin = llo, ymax = hhi,
                       colour = outer.bar.col),
                   position = position_dodge(width = dodge.width),
                   size = outer.bar.size,
                   alpha = 0.8) +
    geom_hline(aes(x = 0, yintercept = 0), lty = 1) +
    geom_point(aes(colour = outer.bar.col, shape = Importance),
               position = position_dodge(width = dodge.width),
               size= (inner.bar.size + outer.bar.size) * 2.5,
               stroke = 1.5) +
    scale_shape_manual("Importance", values = my.pchs) +
    scale_colour_identity() +
    scale_fill_identity() +
    theme_uksc() +
    coord_flip() +
    theme(legend.position = "bottom",
          legend.dir = "horizontal")



## ----fitstatsch6, fig = TRUE, fig.cap = "Goodness-of-fit statistics for different models. Thin lines indicate 95 percent credible intervals; thick lines indicate 83 percent credible intervals. ", warning = FALSE----

epcp.rstanarm <- function(mod) {
    
    poslp <- posterior_linpred(mod, transform = TRUE)
    poslp.m <- melt(poslp)
    poslp.m$CASEID <- mod$data$CASEID[poslp.m$Var2]
    poslp.m$Lead <- mod$data$Lead[poslp.m$Var2]
    poslp.m$JUDGE <- mod$data$JUDGE[poslp.m$Var2]
    poslp.m <- poslp.m %>%
        arrange(iterations, CASEID, JUDGE)
    
    poslp.m <- poslp.m %>%
        group_by(iterations, CASEID) %>%
        mutate(isMax = value == max(value),
               isCorrect = isMax & Lead)
    
    correct <- poslp.m %>%
        group_by(iterations, CASEID) %>%
        summarize(correct = max(isCorrect)) %>%
        group_by(iterations) %>%
        summarize(correct = mean(correct))

    correct <- correct$correct
    
    epcp <- data.frame(term = "PCP\n(bigger is better)",
                       mean = mean(correct, na.rm = TRUE),
                       lo = quantile(correct, 0.025, na.rm = TRUE),
                       hi = quantile(correct, 0.975, na.rm = TRUE),
                       llo = quantile(correct, 1/12, na.rm = TRUE),
                       hhi = quantile(correct, 11/12, na.rm = TRUE))

    the.k <- kfold(mod, K = 10)

    
    the.loo <- data.frame(term = "LOOIC\n(smaller is better)",
                          mean = the.loo$looic,
                          lo = the.loo$looic + qnorm(1/40) * the.loo$se_looic,
                          hi = the.loo$looic + qnorm(39/40) * the.loo$se_looic,
                          llo = the.loo$looic + qnorm(1/24) * the.loo$se_looic,
                          hhi = the.loo$looic + qnorm(23/24) * the.loo$se_looic)
                                        #    return(rbind(epcp, the.loo))
    return(rbind(epcp, the.loo))
}


epcp.clogit <- function(mod) {
    
###
    require(mvtnorm)
    b <- rmvnorm(n = 1000,
                 mean = coef(mod),
                 sigma = vcov(mod)[1:length(coef(mod)), 1:length(coef(mod))])
    b <- cbind(1, b)
    colnames(b)[1] <- "(Intercept)"

    mat <- model.matrix(formula(mod), mod$data)
    
    res <- tcrossprod(b, mat)
    res <- exp(res)
    res <- res / rowSums(res, na.rm = TRUE)
    res.m <- melt(res, varnames = c("iter", "row"))


    aux <- data.frame(row = rownames(mod$data),
                      NEUTRAL = mod$data$NEUTRAL,
                      Lead = mod$data$Lead)
    res.m <- merge(res.m, aux, all.x = TRUE, all.y = FALSE)
    
    res.m <- res.m %>%
        group_by(iter, NEUTRAL) %>%
        mutate(isMax = value == max(value),
               isCorrect = isMax & Lead)
    
    correct <- res.m %>%
        group_by(iter, NEUTRAL) %>%
        summarize(correct = max(isCorrect)) %>%
        group_by(iter) %>%
        summarize(correct = mean(correct))

    correct <- correct$correct
    
    epcp <- data.frame(term = "PCP\n(bigger is better)",
                       mean = mean(correct, na.rm = TRUE),
                       lo = quantile(correct, 0.025, na.rm = TRUE),
                       hi = quantile(correct, 0.975, na.rm = TRUE),
                       llo = quantile(correct, 1/12, na.rm = TRUE),
                       hhi = quantile(correct, 11/12, na.rm = TRUE))

    the.loo <- data.frame(looic = AIC(mod),
                          se_looic = 0)
                      
    the.loo <- data.frame(term = "AIC\n(smaller is better)",
                          mean = the.loo$looic,
                          lo = the.loo$looic + qnorm(1/40) * the.loo$se_looic,
                          hi = the.loo$looic + qnorm(39/40) * the.loo$se_looic,
                          llo = the.loo$looic + qnorm(1/24) * the.loo$se_looic,
                          hhi = the.loo$looic + qnorm(23/24) * the.loo$se_looic)
                                        #    return(rbind(epcp, the.loo))
    return(rbind(epcp, the.loo))
}


if (file.exists("cache/06_gof.RData")) {
    load("cache/06_gof.RData")
} else { 
    
    ## legal.stats <- epcp.rstanarm(legalmod.rstanarm)
    ## org.stats <- epcp.rstanarm(orgmod.rstanarm)
    ## pol.stats <- epcp.rstanarm(polmod.rstanarm)
    ## comb.stats <- epcp.rstanarm(combmod.rstanarm)

    legal.stats <- epcp.clogit(clmod.legal)
    org.stats <- epcp.clogit(clmod.org)
    pol.stats <- epcp.clogit(clmod.political)
    comb.stats <- epcp.clogit(clmod.comb)

    legal.stats$Model <- "Legal"
    org.stats$Model <- "Organisational"
    pol.stats$Model <- "Political"
    comb.stats$Model <- "Combined"

    null.stats <- data.frame(Model = "Null",
                             term = "PCP\n(bigger is better)",
                             ## mean = mean(combmod.rstanarm$y == 1, na.rm = TRUE))
                             mean = mean(clmod.comb$y == 0, na.rm = TRUE))
    
    gof.df <- rbind(
        rbind(
            rbind(legal.stats,
                  org.stats),
            pol.stats),
        comb.stats)
    
    gof.df <- merge(gof.df, null.stats, all = TRUE)
    
    gof.df$Model <- factor(gof.df$Model,
                           levels = rev(c("Null", "Legal",
                                          "Organisational", "Political",
                                          "Combined")),
                           ordered = TRUE)
    

    ## legal.loo <- loo(legalmod.rstanarm)
    ## combined.loo <- loo(combmod.rstanarm)
    ## political.loo <- loo(polmod.rstanarm)
    ## org.loo <- loo(orgmod.rstanarm)

    save(list = c(## "legal.loo", "combined.loo",
                  ## "political.loo", "org.loo",
                  "gof.df"),
         file = "cache/06_gof.RData")
    
}

ggplot(gof.df, aes(x = Model, y = mean, ymax = hi, ymin = lo)) +
    geom_linerange(size = inner.bar.size,
                   colour = inner.bar.col,
                   alpha = 0.8) +
        geom_linerange(aes(ymin = llo, ymax = hhi),
                       size = outer.bar.size,
                       colour = outer.bar.col,
                       alpha = 0.8) +
    geom_point(aes(colour = outer.bar.col),
               size= (inner.bar.size + outer.bar.size) * 2.5,
               shape= my.pchs[1],
               stroke = 1.5) +
    scale_colour_identity() + 
    facet_wrap(~term, scales = "free_x") +
    scale_y_continuous("Value") +
    coord_flip() +
    theme_uksc()

combined.pcp <- subset(gof.df,
                       Model == "Combined" & grepl("PCP", term)) %>%
    pull(mean)

combined.pcp <- round(100 * combined.pcp)


## ----modelling2----------------------------------------------------------
### If you're troubleshooting convergence errors, use
### source("allFit.R")
dat2 <- dat %>%
    group_by(NEUTRAL) %>%
    filter(CASEID == CASEID[1])
                          
vars <- c("Importance",
          "Specialisation",
          "workloadTotal",
          "Seniority",
          "prior_agreement",
          "panel_agreement",
          "prior_dissent")
            
means_std_vars <- numeric()
sd_std_vars <- numeric()
for (v in vars) {
    means_std_vars <- c(means_std_vars,
                        mean(dat2[[v]], na.rm = TRUE))
    sd_std_vars <- c(sd_std_vars,
                        sd(dat2[[v]], na.rm = TRUE))
    dat2[,v] <- arm::rescale(dat2[[v]])
}

names(means_std_vars) <- names(sd_std_vars) <- vars

keep.vars <- c("Wrote",
               "Lead",
               "Importance",
               "Specialisation",
               "workloadTotal",
               "isSeniorJudge",
               "isTemporary",
               "Seniority",
               "prior_agreement",
               "panel_agreement",
               "prior_dissent",
               "Phillips",
               "JUDGE",
               "NEUTRAL")

class(dat2) <- "data.frame"
tmp <- amelia(dat2[, keep.vars], m = 1,
              boot.type = "none",
              idvars = c("NEUTRAL"),
              noms = c("JUDGE"))

dat2 <- tmp$imputations[[1]]


if (file.exists("cache/06_cached_results2.RData")) {
    load("cache/06_cached_results2.RData")
} else {

### Suppress warnings regarding non-integer success
    mod.legal <- stan_glmer(Wrote ~ Importance + Specialisation +
                                Specialisation:Importance +
                                (1|JUDGE) + (1|NEUTRAL),
                            family = binomial,
                            data = subset(dat2, Lead == 0),
                            chains = 2, cores = 2, seed = 1982)

    mod.org <- stan_glmer(Wrote ~ Importance + workloadTotal +
                              workloadTotal:Importance +
                              isSeniorJudge +
                              isSeniorJudge:Importance +
                              isTemporary +
                              isTemporary:Importance +
                              Seniority +
                              Seniority:Importance +
                              Phillips +
                              (1|JUDGE) + (1|NEUTRAL),
                          family = binomial,
                          data = subset(dat2, Lead == 0),
                          chains = 2, cores = 2, seed = 1982)

    mod.political <- stan_glmer(Wrote ~ Importance + prior_agreement +
                                    prior_agreement:Importance +
                                    panel_agreement +
                                    panel_agreement:Importance +
                                    prior_dissent +
                                    prior_dissent:Importance +
                                    (1|JUDGE) + (1|NEUTRAL),
                                family = binomial,
                                data = subset(dat2, Lead == 0),
                                chains = 2, cores = 2, seed = 1982)


    mod.comb <- stan_glmer(Wrote ~ Importance + Specialisation +
                               Specialisation:Importance +
                               workloadTotal +
                               workloadTotal:Importance +
                               isSeniorJudge + 
                               isSeniorJudge:Importance +
                               isTemporary +
                               isTemporary:Importance +
                               Seniority +
                               Seniority:Importance +
                               prior_agreement +
                               prior_agreement:Importance +
                               panel_agreement +
                               panel_agreement:Importance +
                               prior_dissent +
                               prior_dissent:Importance +
                               Phillips +
                               (1|JUDGE) + (1|NEUTRAL),  
                           family = binomial,
                           data = subset(dat2, Lead == 0),
                           chains = 2, cores = 2, seed = 1982)


    
    save(list = c("mod.legal", "mod.org", "mod.political", "mod.comb"),
         file = "cache/06_cached_results2.RData")
}



## ----legalplot2, fig = TRUE, fig.cap = "Legal model of substantive opinion writing. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 2.5----
## Figure height should be what, (nCoefs + 2) / 2.5?

legalmod.labs <- list("(Intercept)" = "Intercept",
                      "Specialisation" = "Specialisation",
                      "Specialisation:Importance" = "Specialisation × importance",
                      "Importance" = "Importance")


                                        #mycoefplot.mult(list(legalmod.rstanarm, legalmod.rstanarm_alt),
mycoefplot(mod.legal,
                ylab = "Coefficient value",
                ylim = c(-1, 4),
                variable.labels = legalmod.labs, 
                sec.ylab = "Odds are ... times greater")

## spec.coef <- round(exp(fixef(legalmod.rstanarm)[1]))
## spec.int <- exp(posterior_interval(legalmod.rstanarm)[1,])
## spec.int <- c(floor(spec.int[1]), ceiling(spec.int[2]))
spec.coef <- NA
spec.int <- NA



## ----orgplot2, fig = TRUE, fig.cap = "Organisational model of substantive opinion writing. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 5----
## Figure height should be what, (nCoefs + 2) / 2.5?
orgmod.labs <- list("(Intercept)" = "Intercept",
                    "isSeniorJudge" = "Presiding judge",
                    "isSeniorJudge:Importance" = "Presiding judge × importance",
                    "Importance:isSeniorJudge" = "Presiding judge × importance",
                    "Seniority" = "Seniority",
                    "Seniority:Importance" = "Seniority × importance",
                    "Importance:Seniority" = "Seniority × importance",
                    "workloadTotal" = "Workload",
                    "workloadTotal:Importance" = "Workload × importance",
                    "Importance:workloadTotal" = "Workload × importance",
                    "isTemporary" = "Temporary judge",
                    "isTemporary:Importance" = "Temporary judge × importance",
                    "Importance:isTemporary" = "Temporary judge × importance")

                                        #mycoefplot.mult(list(orgmod.rstanarm, orgmod.rstanarm_alt),
mycoefplot(mod.org,
                ylab = "Coefficient value",
                variable.labels = orgmod.labs, 
                sec.ylab = "Odds are ... times greater")



## ----polplot2, fig = TRUE, fig.cap = "Political model of substantive opinion writing. Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 5----

polmod.labs <- list("prior_agreement" = "Agreement with presiding judge",
                    "isSeniorJudge" = "Presiding judge",
                    "panel_agreement" = "Agreement with rest of panel",
                    "prior_agreement:Importance" = "Agreement with presiding judge × importance",
                    "isSeniorJudge:Importance" = "Presiding judge × importance",
                    "panel_agreement:Importance" = "Agreement with rest of panel × importance",
                    "Importance:prior_agreement" = "Agreement with presiding judge × importance",
                    "Importance:isSeniorJudge" = "Presiding judge × importance",
                    "Importance:panel_agreement" = "Agreement with rest of panel × importance",
                    "Importance:prior_dissent" = "Prior dissent × importance",
                    "prior_dissent:Importance" = "Prior dissent × importance",
                    "prior_dissent" = "Prior dissent")

## Figure height should be what, (nCoefs + 2) / 2.5?
                                        #mycoefplot.mult(list(polmod.rstanarm, polmod.rstanarm_alt),
mycoefplot(mod.political, 
                ylab = "Coefficient value",
                variable.labels = polmod.labs, 
                sec.ylab = "Odds are ... times greater")



## ----combplot2, fig = TRUE, fig.cap = "Combined model of substantive opinion writing.  Thick lines indicate 83 percent credible intervals. Thin lines indicate 95 percent credible intervals. ", fig.width = 6, fig.height = 6----
## Figure height should be what, (nCoefs + 2) / 2.5?

combmod.labs <- c(legalmod.labs, orgmod.labs, polmod.labs)
combmod.labs <- combmod.labs[!duplicated(combmod.labs)]
### Add on interaction terms with reverse order
addons <- combmod.labs
names(addons) <- gsub("(.*):(.*)", "\\2:\\1", names(combmod.labs))
combmod.labs <- c(combmod.labs, addons)


## mycoefplot.mult(list(combmod.rstanarm, combmod.rstanarm_alt),
mycoefplot(mod.comb,
           ylab = "Coefficient value",
           variable.labels = combmod.labs,
           sec.ylab = "Odds are ... times greater")



## ----simsection2, eval = TRUE, results = "asis"--------------------------
predictprob <- function(mod, newdata = NULL) {
    pr <- posterior_linpred(mod,
                            newdata = newdata,
                            transform = TRUE)
    plogis(pr[,1])
}

low_imp <- with(model.df,
             data.frame(Specialisation = rep(0, 5),
                        workloadTotal = rep(mean(workloadTotal), 5),
                        isSeniorJudge = rep(0, 5),
                        isTemporary = rep(0, 5), 
                        Seniority = rep(0, 5),
                        Importance = rep(0, 5),
                        Phillips = rep(0, 5),
                        JUDGE = rep("6", 5),
                        CASEID = rep("UKSC 2017/0031", 5),
                        NEUTRAL = rep("[2017] UKSC 54", 5),
                        Lead = rep(1/5, 5),
                        prior_agreement = rep(mean(prior_agreement), 5),
                        panel_agreement = rep(mean(panel_agreement), 5),
                        prior_dissent = rep(0, 5)))

high_imp <- low_imp
high_imp$Importance <- max(model.df$Importance)


### (a) Agreement w/ presiding
inc_agpres_low <- low_imp
inc_agpres_high <- high_imp
inc_agpres_low$prior_agreement[1] <- 
    inc_agpres_high$prior_agreement[1] <- 1

agpres.chg.lo <- cf(mod.comb, low_imp, inc_agpres_low)
agpres.chg.hi <- cf(mod.comb, high_imp, inc_agpres_high)

### (b) Agreement w/ panel
inc_agpanel_low <- low_imp
inc_agpanel_high <- high_imp

inc_agpanel_low$panel_agreement[1] <- 
inc_agpanel_high$panel_agreement[1] <- 1

agpanel.chg.lo <- cf(mod.comb, low_imp, inc_agpanel_low)
agpanel.chg.hi <- cf(mod.comb, high_imp, inc_agpanel_high)

### (c) Specialisation
inc_spec_low <- low_imp
inc_spec_high <- high_imp
inc_spec_low$Specialisation[1] <- 
    inc_spec_high$Specialisation[1] <- 1

spec.chg.lo <- cf(mod.comb, low_imp, inc_spec_low)
spec.chg.hi <- cf(mod.comb, high_imp, inc_spec_high)

### (d) Temporary
inc_phillips_low <- low_imp
inc_phillips_high <- high_imp
inc_phillips_low$Phillips[1] <- 1
inc_phillips_high$Phillips[1] <- 1

phillips.chg.lo <- cf(mod.comb, low_imp, inc_phillips_low)
phillips.chg.hi <- cf(mod.comb, high_imp, inc_phillips_high)

### (e) freshman
inc_fresh_low <- low_imp
inc_fresh_high <- high_imp
inc_fresh_low$Seniority[1] <- 1
inc_fresh_high$Seniority[1] <- 1

fresh.chg.lo <- cf(mod.comb, low_imp, inc_fresh_low)
fresh.chg.hi <- cf(mod.comb, high_imp, inc_fresh_high)

### (f) workload
inc_work_low <- low_imp
inc_work_high <- high_imp
inc_work_low$workloadTotal[1] <- 
    inc_work_high$workloadTotal[1] <- 1

work.chg.lo <- cf(mod.comb, low_imp, inc_work_low)
work.chg.hi <- cf(mod.comb, high_imp, inc_work_high)


### Now need to construct a matrix which has nConditions * 3 rows
### and two columns
### How to combine all these?
out <- rbind(agpres.chg.lo, agpres.chg.hi,
             agpanel.chg.lo, agpanel.chg.hi,
             spec.chg.lo, spec.chg.hi,
             phillips.chg.lo, phillips.chg.hi,
             fresh.chg.lo, fresh.chg.hi,
             work.chg.lo, work.chg.hi)

out <- as.data.frame(out)
changes <- c("Average (91%) to low (73%) agreement\nwith presiding judge",
             "Average (88%) to low (74%) agreement\nwith panel",
             "Average (19%) to high (73%) specialism",
             "Phillips' period as President",
             "Average (2.5 years) to high (6.5 years) seniority",
             "20 days more work")

changes <- str_wrap(changes, 20)

out$Change <- rep(changes, each = 2)

out$Importance <- rep(c("Low importance", "High importance"),
                      times = length(changes))
out$Importance <- factor(out$Importance,
                         levels = c("Low importance", "High importance"),
                         ordered = TRUE)


## ----predchanges2, fig = TRUE, fig.cap = "Changes in the predicted probability of writing a substantive opinion, according to certain specific changes in one judge. ", fig.width = 6, fig.height = 6----

dodge.width <- 0.9

## Change bar colors
out$isSignificant <- apply(out[,c("lo", "hi")], 1, isSig)

out$inner.bar.col <- ifelse(out$isSignificant,
                            inner.bar.col,
                            "lightgrey")
out$outer.bar.col <- ifelse(out$isSignificant,
                            outer.bar.col,
                            "darkgrey")

ggplot(out, aes(x = Change,
                y = mean, ymin = lo, ymax = hi,
                group = Importance)) +
    scale_x_discrete("") +
    scale_y_continuous("Change in\nprobability of writing a substantive opinion\n(percentage points)") +
    geom_linerange(aes(colour = inner.bar.col),
                   position = position_dodge(width = dodge.width),
                   size=inner.bar.size, 
                   alpha = 0.8) +
    geom_linerange(aes(ymin = llo, ymax = hhi,
                       colour = outer.bar.col),
                   position = position_dodge(width = dodge.width),
                   size = outer.bar.size,
                   alpha = 0.8) +
    geom_hline(aes(x = 0, yintercept = 0), lty = 1) +
    geom_point(aes(colour = outer.bar.col, shape = Importance),
               position = position_dodge(width = dodge.width),
               size= (inner.bar.size + outer.bar.size) * 2.5,
               stroke = 1.5) +
    scale_shape_manual("Importance", values = my.pchs) +
    scale_colour_identity() +
    scale_fill_identity() +
    theme_uksc() +
    coord_flip() +
    theme(legend.position = "bottom",
          legend.dir = "horizontal")



## ----gof2ch6-------------------------------------------------------------
epcp.rstanarm <- function(mod) {
    poslp <- posterior_linpred(mod)
    preds <- (plogis(poslp) > 0.5)
    
    actual <- matrix(mod$y,
                     nrow = nrow(preds),
                     ncol = length(mod$y),
                     byrow = TRUE)
    correct <- (preds == actual)

    correct <- apply(correct, 1, mean)
    epcp <- data.frame(term = "PCP\n(bigger is better)",
                       mean = mean(correct),
                       lo = quantile(correct, 0.025),
                       hi = quantile(correct, 0.975),
                       llo = quantile(correct, 1/12),
                       hhi = quantile(correct, 11/12))

    the.loo <- loo(mod)
    looic <- the.loo$estimates["looic", 1]
    loose <- the.loo$estimates["looic", 2]
    the.loo <- data.frame(term = "LOOIC\n(smaller is better)",
                          mean = looic,
                          lo = looic + qnorm(1/40) * loose,
                          hi = looic + qnorm(39/40) * loose,
                          llo = looic + qnorm(1/24) * loose,
                          hhi = looic + qnorm(23/24) * loose)
    return(rbind(epcp, the.loo))
}

if (file.exists("cache/06b_gof.RData")) {
    load("cache/06b_gof.RData")
} else { 

    legal.stats <- epcp.rstanarm(mod.legal)
    org.stats <- epcp.rstanarm(mod.org)
    pol.stats <- epcp.rstanarm(mod.political)
    comb.stats <- epcp.rstanarm(mod.comb)

    legal.stats$Model <- "Legal"
    org.stats$Model <- "Organisational"
    pol.stats$Model <- "Political"
    comb.stats$Model <- "Combined"

    null.stats <- data.frame(Model = rep("Null", 2),
                             term = c("PCP\n(bigger is better)",
                                      "LOOIC\n(smaller is better)"),
                             mean = c(1 - mean(mod.legal$y,
                                               na.rm = TRUE),
                                      NA))

    gof.df <- rbind(
        rbind(
            rbind(legal.stats,
                  org.stats),
            pol.stats),
        comb.stats)

    gof.df <- merge(gof.df, null.stats, all = TRUE)

    my.levels <- gof.df %>%
        filter(grepl("LOOIC", term)) %>%
        arrange(mean) %>%
        pull(Model)

    gof.df$Model <- factor(gof.df$Model,
                           levels = rev(my.levels),
                           ordered = TRUE)
    
    legal.loo <- loo(mod.legal)
    combined.loo <- loo(mod.comb)
    political.loo <- loo(mod.political)
    org.loo <- loo(mod.org)

    save(list = c("legal.loo", "combined.loo",
                  "political.loo", "org.loo",
                  "gof.df"),
         file = "cache/06b_gof.RData")
    
}

